setwd("~/Dropbox/clado-manuscript/Mikes_MS_Data/")
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
# Load biom file. 
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
head(sam.data)
##        TreatmentGroup  Site Date                       Description
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
##        SampleID.1
## C172N1     C172N1
## C172N2     C172N2
## C172N3     C172N3
## C172P1     C172P1
## C172P2     C172P2
## C172P3     C172P3
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
head(sam.data); str(sam.data)
##        TreatmentGroup  Site Date                       Description
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
##        SampleID.1  DateSite
## C172N1     C172N1 172 North
## C172N2     C172N2 172 North
## C172N3     C172N3 172 North
## C172P1     C172P1 172 Point
## C172P2     C172P2 172 Point
## C172P3     C172P3 172 Point
## 'data.frame':    52 obs. of  6 variables:
##  $ TreatmentGroup: Factor w/ 2 levels "Early","Late": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Site          : Factor w/ 3 levels "North","Point",..: 1 1 1 2 2 2 3 3 3 1 ...
##  $ Date          : Factor w/ 6 levels "172","178","185",..: 1 1 1 1 1 1 1 1 1 2 ...
##  $ Description   : Factor w/ 52 levels "Sample of day 172 at site North 1",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ SampleID.1    : Factor w/ 52 levels "C172N1","C172N2",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ DateSite      : chr  "172 North" "172 North" "172 North" "172 Point" ...
sample_data(biom) <- sam.data
biom; sample_data(biom)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 51928 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 51928 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 51928 tips and 51914 internal nodes ]
## Sample Data:        [52 samples by 6 sample variables]:
##        TreatmentGroup  Site Date                       Description
## C178N1          Early North  178 Sample of day 178 at site North 1
## C178P1          Early Point  178 Sample of day 178 at site Point 1
## C185P2          Early Point  185 Sample of day 185 at site Point 2
## C206N2           Late North  206 Sample of day 206 at site North 2
## C206P1           Late Point  206 Sample of day 206 at site Point 1
## C206P2           Late Point  206 Sample of day 206 at site Point 2
## C214P1           Late Point  214 Sample of day 214 at site Point 1
## C214P2           Late Point  214 Sample of day 214 at site Point 2
## C214P3           Late Point  214 Sample of day 214 at site Point 3
## C214S1           Late South  214 Sample of day 214 at site South 1
## C214S2           Late South  214 Sample of day 214 at site South 2
## C214S3           Late South  214 Sample of day 214 at site South 3
## C185P1          Early Point  185 Sample of day 185 at site Point 1
## C185P3          Early Point  185 Sample of day 185 at site Point 3
## C199P3           Late Point  199 Sample of day 199 at site Point 3
## C199S2           Late South  199 Sample of day 199 at site South 2
## C199S3           Late South  199 Sample of day 199 at site South 3
## C206N1           Late North  206 Sample of day 206 at site North 1
## C178P2          Early Point  178 Sample of day 178 at site Point 2
## C199N3           Late North  199 Sample of day 199 at site North 3
## C206S1           Late South  206 Sample of day 206 at site South 1
## C214N3           Late North  214 Sample of day 214 at site North 3
## C199N2           Late North  199 Sample of day 199 at site North 2
## C206N3           Late North  206 Sample of day 206 at site North 3
## C206S2           Late South  206 Sample of day 206 at site South 2
## C199N1           Late North  199 Sample of day 199 at site North 1
## C199P1           Late Point  199 Sample of day 199 at site Point 1
## C199S1           Late South  199 Sample of day 199 at site South 1
## C214N1           Late North  214 Sample of day 214 at site North 1
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C199P2           Late Point  199 Sample of day 199 at site Point 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172S3          Early South  172 Sample of day 172 at site South 3
## C178S2          Early South  178 Sample of day 178 at site South 2
## C178P3          Early Point  178 Sample of day 178 at site Point 3
## C178S3          Early South  178 Sample of day 178 at site South 3
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172S1          Early South  172 Sample of day 172 at site South 1
## C178N3          Early North  178 Sample of day 178 at site North 3
## C185N2          Early North  185 Sample of day 185 at site North 2
## C185N3          Early North  185 Sample of day 185 at site North 3
## C185S3          Early South  185 Sample of day 185 at site South 3
## C214N2           Late North  214 Sample of day 214 at site North 2
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C185S2          Early South  185 Sample of day 185 at site South 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
## C185N1          Early North  185 Sample of day 185 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C178S1          Early South  178 Sample of day 178 at site South 1
## C185S1          Early South  185 Sample of day 185 at site South 1
## C172S2          Early South  172 Sample of day 172 at site South 2
## C178N2          Early North  178 Sample of day 178 at site North 2
##        SampleID.1  DateSite
## C178N1     C178N1 178 North
## C178P1     C178P1 178 Point
## C185P2     C185P2 185 Point
## C206N2     C206N2 206 North
## C206P1     C206P1 206 Point
## C206P2     C206P2 206 Point
## C214P1     C214P1 214 Point
## C214P2     C214P2 214 Point
## C214P3     C214P3 214 Point
## C214S1     C214S1 214 South
## C214S2     C214S2 214 South
## C214S3     C214S3 214 South
## C185P1     C185P1 185 Point
## C185P3     C185P3 185 Point
## C199P3     C199P3 199 Point
## C199S2     C199S2 199 South
## C199S3     C199S3 199 South
## C206N1     C206N1 206 North
## C178P2     C178P2 178 Point
## C199N3     C199N3 199 North
## C206S1     C206S1 206 South
## C214N3     C214N3 214 North
## C199N2     C199N2 199 North
## C206N3     C206N3 206 North
## C206S2     C206S2 206 South
## C199N1     C199N1 199 North
## C199P1     C199P1 199 Point
## C199S1     C199S1 199 South
## C214N1     C214N1 214 North
## C172P1     C172P1 172 Point
## C199P2     C199P2 199 Point
## C172N3     C172N3 172 North
## C172S3     C172S3 172 South
## C178S2     C178S2 178 South
## C178P3     C178P3 178 Point
## C178S3     C178S3 178 South
## C172N1     C172N1 172 North
## C172S1     C172S1 172 South
## C178N3     C178N3 178 North
## C185N2     C185N2 185 North
## C185N3     C185N3 185 North
## C185S3     C185S3 185 South
## C214N2     C214N2 214 North
## C172P2     C172P2 172 Point
## C185S2     C185S2 185 South
## C172P3     C172P3 172 Point
## C185N1     C185N1 185 North
## C172N2     C172N2 172 North
## C178S1     C178S1 178 South
## C185S1     C185S1 185 South
## C172S2     C172S2 172 South
## C178N2     C178N2 178 North
head(otu_table(biom))
## OTU Table:          [6 taxa and 52 samples]
##                      taxa are rows
##                                C178N1 C178P1 C185P2 C206N2 C206P1 C206P2
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      1
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     2      0      0      6      0      4
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                   137      8      1     73      8      6
##                                C214P1 C214P2 C214P3 C214S1 C214S2 C214S3
## New.CleanUp.ReferenceOTU155901      0      0      1      0      1      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     9      4     11      2      1      0
## JQ945994.1.1399                     1      0      0      1      0      0
## EF653577.1.1339                     0      0      0      4      0      0
## JQ814729.1.1495                    11      4     27     40      8     23
##                                C185P1 C185P3 C199P3 C199S2 C199S3 C206N1
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      1      0      0      0      0
## KC551585.1.1451                     3      2      5      0      0     11
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     6      4      2     11      2     56
##                                C178P2 C199N3 C206S1 C214N3 C199N2 C206N3
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     0      1      4      3      0      3
## JQ945994.1.1399                     0      0      2      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                    10     23      6     19      5    849
##                                C206S2 C199N1 C199P1 C199S1 C214N1 C172P1
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     1      0      8      2      5      0
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                    14     24      5      5      3      2
##                                C199P2 C172N3 C172S3 C178S2 C178P3 C178S3
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     2      5      8      0      1      0
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     2    121     31      0      0      5
##                                C172N1 C172S1 C178N3 C185N2 C185N3 C185S3
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     0      0      1      0      0      0
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     5     16      4      6     27      4
##                                C214N2 C172P2 C185S2 C172P3 C185N1 C172N2
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      1      0      0      0
## KC551585.1.1451                     3      2      0      2      1      6
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     4     13      1      3     22     21
##                                C178S1 C185S1 C172S2 C178N2
## New.CleanUp.ReferenceOTU155901      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0
## KC551585.1.1451                     0      1      0      3
## JQ945994.1.1399                     0      0      3      0
## EF653577.1.1339                     0      0      0      0
## JQ814729.1.1495                     2      3     21     18
# Custom plotting. 
nolegend <- theme(legend.position="none")
readabund <- labs(y="read abundance")
# Normalize by relative abundance. 
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Ordination plot. 
ordNMDS <- ordinate(biom.relabund, method="NMDS", distance="bray")
## Run 0 stress 0.1164511 
## Run 1 stress 0.1164511 
## ... Procrustes: rmse 3.272388e-06  max resid 1.660708e-05 
## ... Similar to previous best
## Run 2 stress 0.1164511 
## ... Procrustes: rmse 2.653511e-06  max resid 1.013281e-05 
## ... Similar to previous best
## Run 3 stress 0.1164511 
## ... Procrustes: rmse 4.851864e-06  max resid 2.66641e-05 
## ... Similar to previous best
## Run 4 stress 0.1164499 
## ... New best solution
## ... Procrustes: rmse 0.0002641187  max resid 0.00135479 
## ... Similar to previous best
## Run 5 stress 0.1164511 
## ... Procrustes: rmse 0.0002637999  max resid 0.001356076 
## ... Similar to previous best
## Run 6 stress 0.1164499 
## ... Procrustes: rmse 3.215999e-06  max resid 2.066861e-05 
## ... Similar to previous best
## Run 7 stress 0.1164499 
## ... New best solution
## ... Procrustes: rmse 2.24477e-06  max resid 7.595852e-06 
## ... Similar to previous best
## Run 8 stress 0.1164499 
## ... Procrustes: rmse 6.655844e-06  max resid 2.331752e-05 
## ... Similar to previous best
## Run 9 stress 0.1164511 
## ... Procrustes: rmse 0.0002639389  max resid 0.001356566 
## ... Similar to previous best
## Run 10 stress 0.1164499 
## ... Procrustes: rmse 4.751411e-06  max resid 1.984868e-05 
## ... Similar to previous best
## Run 11 stress 0.1164499 
## ... New best solution
## ... Procrustes: rmse 1.369096e-06  max resid 5.075076e-06 
## ... Similar to previous best
## Run 12 stress 0.1164499 
## ... Procrustes: rmse 2.570016e-06  max resid 1.538893e-05 
## ... Similar to previous best
## Run 13 stress 0.1164511 
## ... Procrustes: rmse 0.0002639619  max resid 0.001357598 
## ... Similar to previous best
## Run 14 stress 0.1164511 
## ... Procrustes: rmse 0.0002641559  max resid 0.001347027 
## ... Similar to previous best
## Run 15 stress 0.1164511 
## ... Procrustes: rmse 0.0002639791  max resid 0.001356877 
## ... Similar to previous best
## Run 16 stress 0.1164499 
## ... Procrustes: rmse 1.673567e-06  max resid 6.01596e-06 
## ... Similar to previous best
## Run 17 stress 0.1164511 
## ... Procrustes: rmse 0.000264147  max resid 0.001350813 
## ... Similar to previous best
## Run 18 stress 0.1164499 
## ... Procrustes: rmse 2.231739e-06  max resid 1.00423e-05 
## ... Similar to previous best
## Run 19 stress 0.1164499 
## ... Procrustes: rmse 1.737791e-06  max resid 4.548443e-06 
## ... Similar to previous best
## Run 20 stress 0.1164499 
## ... Procrustes: rmse 3.567674e-06  max resid 1.473758e-05 
## ... Similar to previous best
## *** Solution reached
ordNMDS.k3 <- ordinate(biom.relabund, method="NMDS", distance="bray", k=3)
## Run 0 stress 0.07869986 
## Run 1 stress 0.07870031 
## ... Procrustes: rmse 6.12514e-05  max resid 0.0002914955 
## ... Similar to previous best
## Run 2 stress 0.08803557 
## Run 3 stress 0.08338382 
## Run 4 stress 0.08334049 
## Run 5 stress 0.08696056 
## Run 6 stress 0.07869932 
## ... New best solution
## ... Procrustes: rmse 0.0008157553  max resid 0.003206486 
## ... Similar to previous best
## Run 7 stress 0.08695914 
## Run 8 stress 0.07883936 
## ... Procrustes: rmse 0.004642341  max resid 0.02353269 
## Run 9 stress 0.07869838 
## ... New best solution
## ... Procrustes: rmse 0.0001950192  max resid 0.0006709648 
## ... Similar to previous best
## Run 10 stress 0.07869807 
## ... New best solution
## ... Procrustes: rmse 0.000218397  max resid 0.0006309467 
## ... Similar to previous best
## Run 11 stress 0.07870055 
## ... Procrustes: rmse 0.0005644573  max resid 0.001874261 
## ... Similar to previous best
## Run 12 stress 0.08337021 
## Run 13 stress 0.07869866 
## ... Procrustes: rmse 0.0002681414  max resid 0.0007798339 
## ... Similar to previous best
## Run 14 stress 0.08338119 
## Run 15 stress 0.07869851 
## ... Procrustes: rmse 0.0002175794  max resid 0.0005823668 
## ... Similar to previous best
## Run 16 stress 0.07870038 
## ... Procrustes: rmse 0.0005566319  max resid 0.002503063 
## ... Similar to previous best
## Run 17 stress 0.07869835 
## ... Procrustes: rmse 0.000196876  max resid 0.0008596606 
## ... Similar to previous best
## Run 18 stress 0.0786981 
## ... Procrustes: rmse 0.0001248532  max resid 0.0004052929 
## ... Similar to previous best
## Run 19 stress 0.08337779 
## Run 20 stress 0.07869969 
## ... Procrustes: rmse 0.000444327  max resid 0.002138182 
## ... Similar to previous best
## *** Solution reached
ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "DateSite") + geom_point(size=3) + geom_polygon(aes(fill=DateSite), alpha=0.7)
ord.k3 + labs(title = "Cladophora, 2014") + theme_bw()

# Ordination plot in black and white. 
ord.k3.bw <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", label = "Date") + geom_point(size=2) + geom_polygon(colour="black", size=0.25, fill="white", alpha = 0.2, aes(group=DateSite))
ord.k3.bw + labs(title = "Cladophora, 2014") + theme_bw()

# Facet by Date. 
ord.k3.facet1 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site") + geom_point(size=2.5) + facet_wrap(~Date) + geom_polygon(colour="black", size=0.25, fill="white", alpha = 0.2, aes(group=DateSite))
ord.k3.facet1 + labs(title = "Cladophora, 2014") + theme_bw()

# Facet by Site. 
ord.k3.facet2 <- plot_ordination(biom.relabund, ordNMDS.k3, color = "Date") + geom_point(size=2.5) + facet_wrap(~Site, ncol = 1) + geom_polygon(colour="black", size=0.25, fill="white", alpha = 0.2, aes(group=DateSite))
ord.k3.facet2 + labs(title = "Cladophora, 2014") + theme_bw()

# Find methanotrophs
methanolist <- read.table(file = "methanos.txt")
methanolist <- as.vector(methanolist$V1)
# 
biom.relabund.methanos <- subset_taxa(biom.relabund, Genus %in% as.factor(methanolist))
biom.relabund.methanos
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 567 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 567 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 567 tips and 566 internal nodes ]
head(tax_table(biom.relabund.methanos))
## Taxonomy Table:     [6 taxa by 8 taxonomic ranks]:
##                                Kingdom    Phylum          
## AB240506.1.1496                "Bacteria" "Proteobacteria"
## KC432108.1.1352                "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU247624 "Bacteria" "Proteobacteria"
## AJ422152.1.1406                "Bacteria" "Proteobacteria"
## AB989868.1.1453                "Bacteria" "Proteobacteria"
## AJ422161.1.1420                "Bacteria" "Proteobacteria"
##                                Class                Order            
## AB240506.1.1496                "Betaproteobacteria" "Burkholderiales"
## KC432108.1.1352                "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU247624 "Betaproteobacteria" "Burkholderiales"
## AJ422152.1.1406                "Betaproteobacteria" "Burkholderiales"
## AB989868.1.1453                "Betaproteobacteria" "Burkholderiales"
## AJ422161.1.1420                "Betaproteobacteria" "Burkholderiales"
##                                Family           Genus         Species
## AB240506.1.1496                "Comamonadaceae" "Methylibium" NA     
## KC432108.1.1352                "Comamonadaceae" "Methylibium" NA     
## New.CleanUp.ReferenceOTU247624 "Comamonadaceae" "Methylibium" NA     
## AJ422152.1.1406                "Comamonadaceae" "Methylibium" NA     
## AB989868.1.1453                "Comamonadaceae" "Methylibium" NA     
## AJ422161.1.1420                "Comamonadaceae" "Methylibium" NA     
##                                Rank1
## AB240506.1.1496                NA   
## KC432108.1.1352                NA   
## New.CleanUp.ReferenceOTU247624 NA   
## AJ422152.1.1406                NA   
## AB989868.1.1453                NA   
## AJ422161.1.1420                NA
# 
relabund.methanos <- psmelt(biom.relabund.methanos)
relabund.methanos.genus <- relabund.methanos%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(relabund.methanos.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
## 
##   Sample GenusAbundance TreatmentGroup   Site   Date          Family
##    <chr>          <dbl>         <fctr> <fctr> <fctr>          <fctr>
## 1 C178S2      0.1472802          Early  South    178 Crenotrichaceae
## 2 C199S1      0.1240842           Late  South    199 Crenotrichaceae
## 3 C185S1      0.1272864          Early  South    185 Crenotrichaceae
## 4 C199S3      0.1048677           Late  South    199 Crenotrichaceae
## 5 C185S2      0.1128401          Early  South    185 Crenotrichaceae
## 6 C178S1      0.1214166          Early  South    178 Crenotrichaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# 
p <- ggplot(relabund.methanos.genus, aes(as.factor(Date), GenusAbundance, color = Site))
p <- p + geom_point() + facet_wrap(~Genus, scales="free_y")
p

# Stacked bar plots of methanotrophs. 
barstack.methanos <- plot_bar(biom.relabund.methanos, x = "Date", fill="Site") + facet_wrap(~Genus, scales="free_y")
barstack.methanos

relabund.methanos.genus.est <- summarySE(relabund.methanos.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.methanos.genus.est)
##    Site Date            Genus N GenusAbundance           sd           se
## 1 North  172       Crenothrix 3   8.092387e-04 4.823472e-04 2.784833e-04
## 2 North  172      Methylibium 3   6.600147e-04 3.800437e-04 2.194183e-04
## 3 North  172    Methylocaldum 3   7.717447e-05 5.988775e-05 3.457621e-05
## 4 North  172 Methylomicrobium 3   0.000000e+00 0.000000e+00 0.000000e+00
## 5 North  172     Methylomonas 3   1.097031e-04 2.736128e-06 1.579704e-06
## 6 North  172     Methylosinus 3   4.677898e-05 2.432451e-05 1.404376e-05
##             ci
## 1 1.198217e-03
## 2 9.440809e-04
## 3 1.487694e-04
## 4 0.000000e+00
## 5 6.796919e-06
## 6 6.042543e-05
relabund.methanos.genus.est$Date <- as.character(relabund.methanos.genus.est$Date)
relabund.methanos.genus.est$Date <- as.numeric(relabund.methanos.genus.est$Date)
p <- ggplot(relabund.methanos.genus.est, aes(x=Date, y=GenusAbundance)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_smooth(method="lm", se=TRUE) + facet_wrap(~Genus~Site, ncol = 3, scales="free_y")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# https://joey711.github.io/phyloseq/plot_richness-examples.html
# Plot richness. 
biom.rich <- plot_richness(biom, x="Date", color="Site", measures=c("Observed", "Chao1", "Shannon"))
biom.rich + geom_point(size=2, alpha=0.6)

biom.est.rich <- estimate_richness(biom, measures = NULL)
biom.est.rich$SampleID.1 <- row.names(biom.est.rich)
biom.est.rich <- merge(biom.est.rich, sam.data, by = "SampleID.1")
biom.est.rich$Date <- as.character(biom.est.rich$Date)
biom.est.rich$Date <- as.numeric(biom.est.rich$Date)
biom.est.rich.obs <- summarySE(biom.est.rich, measurevar="Observed", groupvars=c("Date","Site"))
str(biom.est.rich.obs)
## 'data.frame':    18 obs. of  7 variables:
##  $ Date    : num  172 172 172 178 178 178 185 185 185 199 ...
##  $ Site    : Factor w/ 3 levels "North","Point",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ N       : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ Observed: num  5755 3840 4230 4371 4199 ...
##  $ sd      : num  984 952 161 688 376 ...
##  $ se      : num  568.2 549.4 93.1 397.2 217.2 ...
##  $ ci      : num  2445 2364 400 1709 934 ...
p.obs <- ggplot(biom.est.rich.obs, aes(x=Date, y=Observed)) + geom_point() +  geom_errorbar(aes(ymin=Observed-se, ymax=Observed+se)) + geom_smooth(method="lm",se=TRUE) + facet_grid(~ Site)
p.obs + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

biom.est.rich.sha <- summarySE(biom.est.rich, measurevar="Shannon", groupvars=c("Date","Site")); biom.est.rich.sha
##    Date  Site N  Shannon         sd          se         ci
## 1   172 North 3 5.691512 0.27015157 0.155972080 0.67109370
## 2   172 Point 3 4.994391 0.27100848 0.156466818 0.67322238
## 3   172 South 3 5.289918 0.05771106 0.033319494 0.14336221
## 4   178 North 3 5.239205 0.06234512 0.035994972 0.15487386
## 5   178 Point 3 5.189739 0.05970346 0.034469806 0.14831161
## 6   178 South 3 5.275200 0.03777686 0.021810483 0.09384293
## 7   185 North 3 5.278452 0.08534700 0.049275111 0.21201369
## 8   185 Point 3 5.379744 0.14580223 0.084178957 0.36219282
## 9   185 South 3 5.395927 0.01475526 0.008518956 0.03665411
## 10  199 North 3 5.464040 0.10460268 0.060392385 0.25984746
## 11  199 Point 3 5.555742 0.04974626 0.028721015 0.12357655
## 12  199 South 3 5.709314 0.06839587 0.039488375 0.16990477
## 13  206 North 3 5.509243 0.63523457 0.366752847 1.57801014
## 14  206 Point 2 5.777171 0.07811922 0.055238633 0.70187338
## 15  206 South 2 6.029796 0.10170518 0.071916421 0.91378477
## 16  214 North 3 5.606372 0.04046684 0.023363538 0.10052519
## 17  214 Point 3 5.740453 0.03729225 0.021530691 0.09263909
## 18  214 South 3 6.435686 0.05378621 0.031053481 0.13361234
p.sha <- ggplot(biom.est.rich.sha, aes(x=Date, y=Shannon)) + geom_point() +  geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) +  geom_smooth(method="lm",se=TRUE) + facet_grid(~ Site)
p.sha + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

biom.est.rich.ch1 <- summarySE(biom.est.rich, measurevar="Chao1", groupvars=c("Date","Site")); biom.est.rich.ch1
##    Date  Site N    Chao1        sd        se        ci
## 1   172 North 3 7429.346  935.0547 539.85409 2322.8047
## 2   172 Point 3 5666.254 1199.0783 692.28819 2978.6757
## 3   172 South 3 6360.755  398.6771 230.17633  990.3688
## 4   178 North 3 6700.207 1222.0559 705.55428 3035.7551
## 5   178 Point 3 6664.523  453.2735 261.69757 1125.9938
## 6   178 South 3 6917.707  230.6817 133.18414  573.0451
## 7   185 North 3 6346.119  309.1132 178.46659  767.8797
## 8   185 Point 3 6802.057  493.3480 284.83460 1225.5444
## 9   185 South 3 6394.629  463.1020 267.37204 1150.4091
## 10  199 North 3 6660.997  383.5766 221.45806  952.8571
## 11  199 Point 3 7173.874  299.3104 172.80691  743.5281
## 12  199 South 3 7870.023  384.8040 222.16671  955.9062
## 13  206 North 3 8351.495  741.9579 428.36959 1843.1256
## 14  206 Point 2 8458.037  691.5015 488.96543 6212.8949
## 15  206 South 2 8495.197  800.6637 566.15470 7193.6776
## 16  214 North 3 7347.579 1190.2184 687.17294 2956.6665
## 17  214 Point 3 7762.503  126.9796  73.31169  315.4347
## 18  214 South 3 8723.704 1065.9220 615.41037 2647.8971
p.ch1 <- ggplot(biom.est.rich.ch1, aes(x=Date, y=Chao1)) + geom_point() +  geom_errorbar(aes(ymin=Chao1-se, ymax=Chao1+se)) +  geom_smooth(method="lm",se=TRUE) + facet_grid(~ Site)
p.ch1 + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

library("vegan"); packageVersion("vegan")
## [1] '2.4.1'
# Anosim by site. 
site_group = get_variable(biom, "Site")
site_ano = anosim(phyloseq::distance(biom, "bray"), site_group)
site_ano
## 
## Call:
## anosim(dat = phyloseq::distance(biom, "bray"), grouping = site_group) 
## Dissimilarity: bray 
## 
## ANOSIM statistic R: 0.3419 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999
# Anosim by date. 
date_group = get_variable(biom, "Date")
date_ano = anosim(phyloseq::distance(biom, "bray"), date_group)
date_ano
## 
## Call:
## anosim(dat = phyloseq::distance(biom, "bray"), grouping = date_group) 
## Dissimilarity: bray 
## 
## ANOSIM statistic R: 0.6204 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999
# 
df = as(sample_data(biom), "data.frame")
d = phyloseq::distance(biom, "bray")
clado.adonis = adonis(d ~ Date + Site + Date*Site, df)
clado.adonis
## 
## Call:
## adonis(formula = d ~ Date + Site + Date * Site, data = df) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Date       5    3.1945 0.63890 17.4143 0.41338  0.001 ***
## Site       2    1.5561 0.77804 21.2068 0.20136  0.001 ***
## Date:Site 10    1.7298 0.17298  4.7148 0.22384  0.001 ***
## Residuals 34    1.2474 0.03669         0.16142           
## Total     51    7.7278                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
top.genera = sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Genus"], sum), TRUE)
top.genera = top.genera[1:25]
# Prune to just the most-abundant genera. 
biom.relabund.subset = subset_taxa(biom.relabund, Genus %in% names(top.genera))
get_taxa_unique(biom.relabund.subset, "Genus")
##  [1] "Hydrogenophaga"    "Cellvibrio"        "Pseudomonas"      
##  [4] "Crenothrix"        "Methylotenera"     "Dechloromonas"    
##  [7] "Lysobacter"        "Hyphomicrobium"    "Rhodobacter"      
## [10] "Planctomyces"      "Pseudanabaena"     "Leptolyngbya"     
## [13] "Synechococcus"     "Bdellovibrio"      "Armatimonas"      
## [16] "Sediminibacterium" "Flavobacterium"    "Fluviicola"       
## [19] "Flectobacillus"    "Haliscomenobacter" "Leadbetterella"   
## [22] "Emticicia"         "Hyphomonas"        "Meiothermus"      
## [25] "CM44"
# Find top phyla. 
top.genera.list <- as.vector(names(top.genera))
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Genus %in% as.factor(top.genera.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8747 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 8747 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 8747 tips and 8743 internal nodes ]
head(tax_table(biom.relabund.subset.taxa))
## Taxonomy Table:     [6 taxa by 8 taxonomic ranks]:
##                                Kingdom    Phylum          
## New.CleanUp.ReferenceOTU269444 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU117342 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU139696 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU449979 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU112788 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU455978 "Bacteria" "Proteobacteria"
##                                Class                Order            
## New.CleanUp.ReferenceOTU269444 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU117342 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU139696 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU449979 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU112788 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU455978 "Betaproteobacteria" "Burkholderiales"
##                                Family           Genus            Species
## New.CleanUp.ReferenceOTU269444 "Comamonadaceae" "Hydrogenophaga" NA     
## New.CleanUp.ReferenceOTU117342 "Comamonadaceae" "Hydrogenophaga" NA     
## New.CleanUp.ReferenceOTU139696 "Comamonadaceae" "Hydrogenophaga" NA     
## New.CleanUp.ReferenceOTU449979 "Comamonadaceae" "Hydrogenophaga" NA     
## New.CleanUp.ReferenceOTU112788 "Comamonadaceae" "Hydrogenophaga" NA     
## New.CleanUp.ReferenceOTU455978 "Comamonadaceae" "Hydrogenophaga" NA     
##                                Rank1
## New.CleanUp.ReferenceOTU269444 NA   
## New.CleanUp.ReferenceOTU117342 NA   
## New.CleanUp.ReferenceOTU139696 NA   
## New.CleanUp.ReferenceOTU449979 NA   
## New.CleanUp.ReferenceOTU112788 NA   
## New.CleanUp.ReferenceOTU455978 NA
relabund.top.genera <- psmelt(biom.relabund.subset.taxa)
relabund.top.genera.genus <- relabund.top.genera%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(relabund.top.genera.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
## 
##   Sample GenusAbundance TreatmentGroup   Site   Date          Family
##    <chr>          <dbl>         <fctr> <fctr> <fctr>          <fctr>
## 1 C178S2     0.14728018          Early  South    178 Crenotrichaceae
## 2 C206N2     0.07541493           Late  North    206      Thermaceae
## 3 C199S1     0.12408425           Late  South    199 Crenotrichaceae
## 4 C185S1     0.12728636          Early  South    185 Crenotrichaceae
## 5 C199S3     0.10486769           Late  South    199 Crenotrichaceae
## 6 C172S1     0.10894857          Early  South    172   Cytophagaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# Plot. 
relabund.top.genera.genus.est <- summarySE(relabund.top.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.top.genera.genus.est)
##    Site Date         Genus N GenusAbundance           sd           se
## 1 North  172   Armatimonas 3   0.0082179013 0.0037601213 0.0021709070
## 2 North  172  Bdellovibrio 3   0.0023540936 0.0001963925 0.0001133873
## 3 North  172    Cellvibrio 3   0.0058399987 0.0066066072 0.0038143264
## 4 North  172          CM44 3   0.0088329518 0.0005298073 0.0003058844
## 5 North  172    Crenothrix 3   0.0008092387 0.0004823472 0.0002784833
## 6 North  172 Dechloromonas 3   0.0019270556 0.0026318679 0.0015195096
##             ci
## 1 0.0093406591
## 2 0.0004878661
## 3 0.0164117220
## 4 0.0013161143
## 5 0.0011982168
## 6 0.0065379223
relabund.top.genera.genus.est$Date <- as.character(relabund.top.genera.genus.est$Date)
relabund.top.genera.genus.est$Date <- as.numeric(relabund.top.genera.genus.est$Date)
p <- ggplot(relabund.top.genera.genus.est, aes(x=Date, y=GenusAbundance)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_smooth(method="lm", se=TRUE) + facet_wrap(~Genus~Site, ncol = 3, scales="free_y")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))